<<<<<<< HEAD ======= h1.title {font-size: 38px;} h2 {font-size: 30px;} h3 {font-size: 24px;} h4 {font-size: 18px;} h5 {font-size: 16px;} h6 {font-size: 12px;} code {color: inherit; background-color: rgba(0, 0, 0, 0.04);} pre:not([class]) { background-color: white } >>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d ======= code{white-space: pre-wrap;} span.smallcaps{font-variant: small-caps;} span.underline{text-decoration: underline;} div.column{display: inline-block; vertical-align: top; width: 50%;} div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} ul.task-list{list-style: none;} >>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
<<<<<<< HEAD

This file is regarding the results for the water stress green house experiment - Group 5

Chapter 1 - First dealing with data

The initial step is the aggregation of all data from the groups (1-5) in a shared Excel file on Google Drive. This document should be downloaded as an excel sheet named “Data”.
=======

This file is regarding the preliminary results for the water stress green house experiment - Group 5

Chapter 1 - First dealing with data

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Visualization of data with plots.
# first thing to do is importing the data
# install.packages("readxl")
readxl::read_excel("data_dec/water_stress.xlsx")
## # A tibble: 1,260 × 23
##    Group Week  Date                Species PlantId Use   Treat…¹ Soil_…² Elect…³
##    <chr> <chr> <dttm>              <chr>   <chr>   <chr> <chr>     <dbl>   <dbl>
##  1 G1    W1    2022-10-05 00:00:00 Solanu… Slc1    cult… c          55.6    1.31
##  2 G1    W1    2022-10-05 00:00:00 Solanu… Slc2    cult… c          52.7    1.28
##  3 G1    W1    2022-10-05 00:00:00 Solanu… Slc3    cult… c          61.3    1.24
##  4 G1    W1    2022-10-05 00:00:00 Solanu… Slc4    cult… c          51      1.36
##  5 G1    W1    2022-10-05 00:00:00 Solanu… Slc5    cult… c          52.1    1.39
##  6 G1    W1    2022-10-05 00:00:00 Solanu… Slc6    cult… c          54.4    1.24
##  7 G1    W1    2022-10-05 00:00:00 Solanu… Slc7    cult… c          51.6    1.51
##  8 G1    W1    2022-10-05 00:00:00 Amaran… Arc1    wild  c          54.6    1.68
##  9 G1    W1    2022-10-05 00:00:00 Amaran… Arc2    wild  c          50.9    1.93
## 10 G1    W1    2022-10-05 00:00:00 Amaran… Arc3    wild  c          67.2    1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## #   Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## #   Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## #   Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## #   Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## #   variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")

after importing the data and give it the name of d0, we are going to visualize it, by creation different plots.

##Visualization of data with plots. Create many plots.

X= Date Y= Variable Y1= Plant height Y2= Leaf number Y3= Leaf lenght Y4= Leaf width Y5= Leaf area Y7= Chlorophyll

# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)

# first we are going to visualize all the species in a plot
ggplot(d0, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) + 
  geom_line()+
  facet_grid(Species ~.)
## Warning: Removed 3 rows containing missing values (`geom_line()`).
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d

in this code chunk we are try to visualize one species, to know how to plot it first

# For Solanum lycopersicum
s1 <- d0[d0$Species=="Solanum lycopersicum",]
ggplot(s1, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) + 
  geom_line()
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d

then we will create a for loop to visualize all the species and all the variables

v1 <- c("Plant_height", "Leaf_width", "Leaf_length", "Leaf_area", "Leaf_number", "Root_length", "Chlorophyll_content", "Soil_humidity", "Electrical_conductivity")
i <- "Beta vulgaris"
variable <- "Plant_height"

for(i in levels(as.factor(d0$Species))) {
  for(variable in v1) {
    s1 <- d0[d0$Species==i, c(variable, "Week", "PlantId", "Treatment")]
    s1 <- na.exclude(s1)
    p <- ggplot(s1, aes(x= Week, y= .data[[variable]], group= PlantId, color= Treatment)) + 
      geom_line() + 
      labs(title = i)
    print(p)
  }
}
<<<<<<< HEAD

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

=======

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d

## Chapter 2 - ANOVA
in this part we are trying to do an ANOVA analysis for the data


```r
#Packages
#install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"), , dependencies = TRUE)
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
<<<<<<< HEAD
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
=======
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.5      
>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(broom)

#Import data
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
d0
## # A tibble: 1,260 × 23
##    Group Week  Date                Species PlantId Use   Treat…¹ Soil_…² Elect…³
##    <chr> <chr> <dttm>              <chr>   <chr>   <chr> <chr>     <dbl>   <dbl>
##  1 G1    W1    2022-10-05 00:00:00 Solanu… Slc1    cult… c          55.6    1.31
##  2 G1    W1    2022-10-05 00:00:00 Solanu… Slc2    cult… c          52.7    1.28
##  3 G1    W1    2022-10-05 00:00:00 Solanu… Slc3    cult… c          61.3    1.24
##  4 G1    W1    2022-10-05 00:00:00 Solanu… Slc4    cult… c          51      1.36
##  5 G1    W1    2022-10-05 00:00:00 Solanu… Slc5    cult… c          52.1    1.39
##  6 G1    W1    2022-10-05 00:00:00 Solanu… Slc6    cult… c          54.4    1.24
##  7 G1    W1    2022-10-05 00:00:00 Solanu… Slc7    cult… c          51.6    1.51
##  8 G1    W1    2022-10-05 00:00:00 Amaran… Arc1    wild  c          54.6    1.68
##  9 G1    W1    2022-10-05 00:00:00 Amaran… Arc2    wild  c          50.9    1.93
## 10 G1    W1    2022-10-05 00:00:00 Amaran… Arc3    wild  c          67.2    1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## #   Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## #   Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## #   Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## #   Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## #   variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
names(d0)
##  [1] "Group"                   "Week"                   
##  [3] "Date"                    "Species"                
##  [5] "PlantId"                 "Use"                    
##  [7] "Treatment"               "Soil_humidity"          
##  [9] "Electrical_conductivity" "Too_dry"                
## [11] "Plant_height"            "Leaf_number"            
## [13] "Leaf_length"             "Leaf_width"             
## [15] "Leaf_area"               "Chlorophyll_content"    
## [17] "Aerial_fresh_weight"     "Aerial_dry_weight"      
## [19] "Root_length"             "Roots_fresh_weight"     
## [21] "Roots_dry_weight"        "Mortality"              
## [23] "Comments"
Linear model
Plant Height

Most visual difference is in week 6 (w6)

d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Plant_height") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Plant_height ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Plant_height
##            Df Sum Sq Mean Sq  F value    Pr(>F)    
## Treatment   2    454   226.9   7.4552 0.0007558 ***
## Species     8  59474  7434.2 244.2441 < 2.2e-16 ***
## Residuals 198   6027    30.4                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Plant_height ~ Treatment + Species, data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.8451  -3.1967  -0.2015   2.2747  21.7561 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  14.9153     1.0076  14.802  < 2e-16 ***
## Treatmenti                   -0.3329     0.9325  -0.357  0.72152    
## Treatments                   -3.3988     0.9361  -3.631  0.00036 ***
## SpeciesBeta vulgaris          3.9708     1.4991   2.649  0.00873 ** 
## SpeciesHordeum vulgare       37.2238     1.4745  25.245  < 2e-16 ***
## SpeciesLolium perenne        26.7429     1.4745  18.137  < 2e-16 ***
## SpeciesPortulacea oleracea   -6.9810     1.4745  -4.734 4.18e-06 ***
## SpeciesRaphanus sativus       6.1143     1.4745   4.147 5.00e-05 ***
## SpeciesSolanum lycopersicum  44.5286     1.4745  30.199  < 2e-16 ***
## SpeciesSonchus oleraceus      4.5286     1.4745   3.071  0.00243 ** 
## SpeciesSpinacia oleracea      1.0048     1.4745   0.681  0.49640    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.517 on 198 degrees of freedom
## Multiple R-squared:  0.9086, Adjusted R-squared:  0.904 
## F-statistic: 196.9 on 10 and 198 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y= Plant_height, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Leaf number
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_number
##            Df Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   3.27   1.633  1.7577 0.1751    
## Species     8 624.58  78.072 84.0148 <2e-16 ***
## Residuals 199 184.92   0.929                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_number
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2  334.5  167.25  16.033 3.519e-07 ***
## Species     8 3996.6  499.58  47.891 < 2.2e-16 ***
## Residuals 198 2065.5   10.43                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Leaf length
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_length
##            Df Sum Sq Mean Sq  F value Pr(>F)    
## Treatment   2   10.4    5.22   1.3734 0.2556    
## Species     8 6314.1  789.27 207.5601 <2e-16 ***
## Residuals 199  756.7    3.80                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_length
##            Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Treatment   2    79.8    39.9   6.1425  0.002581 ** 
## Species     8 30954.5  3869.3 595.6664 < 2.2e-16 ***
## Residuals 198  1286.2     6.5                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Leaf width
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_width
##            Df Sum Sq Mean Sq  F value  Pr(>F)    
## Treatment   2   2.23   1.113   3.5073 0.03184 *  
## Species     8 708.84  88.605 279.2442 < 2e-16 ***
## Residuals 199  63.14   0.317                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_width
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   30.0   15.00  11.645 1.654e-05 ***
## Species     8 5014.3  626.78 486.695 < 2.2e-16 ***
## Residuals 198  255.0    1.29                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Leaf area
Week 1
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_area
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   202.4  101.19   6.376  0.002137 ** 
## Species     8 10806.2 1350.77  85.111 < 2.2e-16 ***
## Residuals 170  2698.0   15.87                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Leaf_area
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   7915  3957.3  30.502  3.99e-12 ***
## Species     8 150426 18803.3 144.930 < 2.2e-16 ***
## Residuals 179  23223   129.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Chlorophyll content
Week 3
d1 <- d0[d0$Week == "W3" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment  2 107.26  53.630  6.5679  0.002311 ** 
## Species    3 298.82  99.607 12.1985 1.258e-06 ***
## Residuals 78 636.91   8.166                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d ###### Week 4

d1 <- d0[d0$Week == "W4" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2   15.56   7.780  0.8133    0.4459    
## Species     5  533.87 106.774 11.1623 8.283e-09 ***
## Residuals 117 1119.17   9.566                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d ###### Week 5

d1 <- d0[d0$Week == "W5" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df  Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   23.87   11.94  0.8801 0.4168    
## Species     6 2018.13  336.36 24.7988 <2e-16 ***
## Residuals 158 2143.01   13.56                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d ###### Week 6

d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Chlorophyll_content
##            Df Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   20.7   10.36  0.5908 0.5551    
## Species     6 4832.8  805.47 45.9083 <2e-16 ***
## Residuals 158 2772.1   17.55                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Aerial fresh weight
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_fresh_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Aerial_fresh_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Aerial_fresh_weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2 1673.8  836.91  62.444 < 2.2e-16 ***
## Species     8 5552.5  694.07  51.786 < 2.2e-16 ***
## Residuals 198 2653.7   13.40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Aerial_fresh_weight ~ Treatment + Species, data = d1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8499 -2.7699 -0.1789  2.4298 12.7110 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.4232     0.6721   6.581 4.09e-10 ***
## Treatmenti                   -3.8343     0.6211  -6.173 3.71e-09 ***
## Treatments                   -6.9069     0.6188 -11.161  < 2e-16 ***
## SpeciesBeta vulgaris         10.7067     0.9824  10.898  < 2e-16 ***
## SpeciesHordeum vulgare        5.6667     0.9824   5.768 3.05e-08 ***
## SpeciesLolium perenne         1.0167     0.9824   1.035 0.301993    
## SpeciesPortulacea oleracea    0.6653     0.9824   0.677 0.499097    
## SpeciesRaphanus sativus       8.0157     0.9824   8.159 3.84e-14 ***
## SpeciesSolanum lycopersicum  15.2534     0.9824  15.526  < 2e-16 ***
## SpeciesSonchus oleraceus     10.9310     0.9824  11.126  < 2e-16 ***
## SpeciesSpinacia oleracea      3.5238     0.9824   3.587 0.000422 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.661 on 198 degrees of freedom
## Multiple R-squared:  0.7314, Adjusted R-squared:  0.7178 
## F-statistic: 53.92 on 10 and 198 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_fresh_weight, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Aerial dry weight
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_dry_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Aerial_dry_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Aerial_dry_weight
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Treatment   2  2.499  1.2496  16.463 2.518e-07 ***
## Species     8 53.307  6.6634  87.789 < 2.2e-16 ***
## Residuals 192 14.573  0.0759                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Aerial_dry_weight ~ Treatment + Species, data = d1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8084 -0.1337 -0.0454  0.1422  1.3776 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  0.203660   0.050654   4.021 8.34e-05 ***
## Treatmenti                  -0.087959   0.047789  -1.841   0.0672 .  
## Treatments                  -0.291591   0.047335  -6.160 4.18e-09 ***
## SpeciesBeta vulgaris         0.649612   0.077659   8.365 1.22e-14 ***
## SpeciesHordeum vulgare       0.613333   0.073632   8.330 1.51e-14 ***
## SpeciesLolium perenne        0.080476   0.073632   1.093   0.2758    
## SpeciesPortulacea oleracea  -0.004286   0.073632  -0.058   0.9536    
## SpeciesRaphanus sativus      0.801905   0.073632  10.891  < 2e-16 ***
## SpeciesSolanum lycopersicum  1.464762   0.073632  19.893  < 2e-16 ***
## SpeciesSonchus oleraceus     1.228748   0.079261  15.503  < 2e-16 ***
## SpeciesSpinacia oleracea     0.140476   0.073632   1.908   0.0579 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2755 on 192 degrees of freedom
## Multiple R-squared:  0.7929, Adjusted R-squared:  0.7821 
## F-statistic: 73.52 on 10 and 192 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_dry_weight, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
Root length
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Root_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells 
lm1 <- lm(Root_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
## 
## Response: Root_length
##            Df  Sum Sq Mean Sq F value Pr(>F)    
## Treatment   2   131.2   65.60  1.4067 0.2481    
## Species     7 12935.0 1847.86 39.6256 <2e-16 ***
## Residuals 155  7228.1   46.63                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
## 
## Call:
## lm(formula = Root_length ~ Treatment + Species, data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1485  -3.9889  -0.4197   2.6301  27.3015 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.5300     1.7526   2.585   0.0107 *  
## Treatmenti                   -0.5187     1.2905  -0.402   0.6883    
## Treatments                    0.8026     1.3114   0.612   0.5414    
## SpeciesBeta vulgaris         15.6315     2.1971   7.114 3.90e-11 ***
## SpeciesHordeum vulgare       20.1084     2.1971   9.152 3.07e-16 ***
## SpeciesLolium perenne        10.3789     2.1971   4.724 5.15e-06 ***
## SpeciesPortulacea oleracea   -0.1675     2.1971  -0.076   0.9393    
## SpeciesRaphanus sativus      22.9372     2.1971  10.440  < 2e-16 ***
## SpeciesSolanum lycopersicum  20.3896     2.1971   9.280  < 2e-16 ***
## SpeciesSonchus oleraceus     22.6601     2.1971  10.313  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.829 on 155 degrees of freedom
## Multiple R-squared:  0.6438, Adjusted R-squared:  0.6232 
## F-statistic: 31.13 on 9 and 155 DF,  p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Root_length, fill= Species)) + 
  geom_boxplot() +
  facet_grid(. ~ Species) +
  theme(strip.text.x = element_text(size=0))
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d

part3: PCA analysis

in this part we are going to do a PCA analysis for the data

#importing data

#we already imported the data in the previous parts, that's why the functions of importing the data are commented

#readxl::read_excel("data_dec/water_stress.xlsx")
#d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")

Next we need to make a subset for the data with only numerical variables and exclude the other variables, scale the data and after that we will exclude all the missing data

PCA_data <- d0[c(8:9, 11:21)]
PCA_data_scaled <- as.data.frame(scale(PCA_data))
view(PCA_data_scaled)

# exclude missing values NA

PCA_data01 <- na.exclude(PCA_data_scaled)

now we are going to do a PCA of the data

PCA <- prcomp(PCA_data01, scale = FALSE)
print(PCA)
## Standard deviations (1, .., p=13):
##  [1] 3.72417071 1.82689971 1.22163374 0.80635464 0.42812078 0.36826198
##  [7] 0.29411880 0.25637763 0.22416993 0.14955205 0.09197022 0.07003550
## [13] 0.05500601
## 
## Rotation (n x k) = (13 x 13):
##                                  PC1           PC2         PC3         PC4
## Soil_humidity           -0.017254267  0.1700524612 -0.06229738  0.37659356
## Electrical_conductivity -0.009083116 -0.0003728194  0.03074676  0.05696844
## Plant_height            -0.302500773  0.4011705238  0.07571629  0.17773513
## Leaf_number              0.383546280  0.1584491520  0.89550212  0.04728623
## Leaf_length             -0.203988882  0.2201324847  0.01792070  0.04563054
## Leaf_width              -0.376722327  0.3584247733  0.13749368  0.18123028
## Leaf_area               -0.406776584  0.1193938737  0.11400102 -0.12319435
## Chlorophyll_content      0.053548438 -0.0083927869  0.11955101 -0.52466043
## Aerial_fresh_weight     -0.304647459  0.0095169900  0.07750988 -0.60740918
## Aerial_dry_weight       -0.299581093  0.1202823281  0.15375918 -0.17423714
## Root_length             -0.234939133 -0.0991357149  0.11158576  0.22920582
## Roots_fresh_weight      -0.366927920 -0.7173142276  0.28542344  0.20062516
## Roots_dry_weight        -0.191697995 -0.2342223964  0.13204067  0.06036467
##                                 PC5         PC6          PC7          PC8
## Soil_humidity           -0.13424503  0.80163704  0.054619942 -0.349430979
## Electrical_conductivity -0.04023364  0.15639691  0.058964791  0.006254404
## Plant_height            -0.15738002 -0.19141869  0.378327653  0.174602577
## Leaf_number              0.13279639  0.01096453  0.002447671 -0.064674801
## Leaf_length             -0.07783896 -0.16330052  0.087459024 -0.240025870
## Leaf_width              -0.20366669 -0.24631698  0.028026619 -0.114076142
## Leaf_area                0.30221451  0.09982567 -0.681340502  0.069971600
## Chlorophyll_content     -0.80385605  0.09581169 -0.168912505 -0.037246649
## Aerial_fresh_weight      0.33545490  0.11467123  0.344152239 -0.495270203
## Aerial_dry_weight        0.04126735  0.39789489  0.080838705  0.646357818
## Root_length             -0.14842516 -0.11140370 -0.415793499 -0.299757085
## Roots_fresh_weight      -0.09667141 -0.01320419  0.189440592 -0.012337067
## Roots_dry_weight        -0.08538508  0.07495060  0.124352297  0.114905812
##                                 PC9        PC10         PC11        PC12
## Soil_humidity            0.10723286  0.02625363  0.161719915 -0.02591782
## Electrical_conductivity  0.06503693 -0.19795904 -0.901126383  0.32809096
## Plant_height             0.23015184  0.58379038 -0.004741431  0.21725552
## Leaf_number              0.01099318  0.02575191  0.011445878 -0.02181855
## Leaf_length              0.10924809  0.02157394 -0.216087955 -0.43394879
## Leaf_width              -0.08954773 -0.68664130  0.160202064  0.03916263
## Leaf_area                0.45640243  0.07055450 -0.029138806 -0.03138557
## Chlorophyll_content      0.13742629  0.04673116  0.020883278  0.02487670
## Aerial_fresh_weight     -0.13745256  0.04877217  0.008387221  0.08335376
## Aerial_dry_weight       -0.40822616 -0.07418062  0.059597955  0.04358086
## Root_length             -0.64986782  0.34689769 -0.090919143  0.14780741
## Roots_fresh_weight       0.26470736 -0.07207278  0.146368621  0.22593645
## Roots_dry_weight        -0.07808654  0.07830177 -0.233897863 -0.75552496
##                                 PC13
## Soil_humidity           -0.018399852
## Electrical_conductivity -0.055153190
## Plant_height            -0.184937225
## Leaf_number              0.017061335
## Leaf_length              0.749755512
## Leaf_width              -0.234986243
## Leaf_area               -0.081054975
## Chlorophyll_content     -0.009653067
## Aerial_fresh_weight     -0.120177840
## Aerial_dry_weight        0.282566160
## Root_length              0.005490618
## Roots_fresh_weight       0.189240668
## Roots_dry_weight        -0.456051781

now we will plot the PCA results

# Plotting the PCA results
# install.packages("factoextra") 
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")  

library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)

fviz_eig(PCA)
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
# graph for individuals



fviz_pca_ind(PCA,
             col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d
# graph of variable


fviz_pca_var(PCA,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)
<<<<<<< HEAD

=======

>>>>>>> dd2acc00e973aafb1c1854d8664edba5dc8bc39d